This notebook illustrates fitting a Gaussian process for daily average Nitrous Oxide levels with data from the Liverpool site from date 1 onwards.
# Import external packages
import numpy as np
import pandas as pd
# Import temporal Gaussian process class
from stgp.TGP import TGP
# Load air quality dataset
df = pd.read_csv('..\Data processing\Daily avg air quality data 21-04-2020 to 2021 with site \
data.csv')
print(df.shape)
print(df.head())
(16104, 11)
Site Lat.South Long.East Alt.AHD Date PM10.24h.mcrg.m3 \
0 ALBURY -36.05183 146.93980 182 1 NaN
1 BARGO -34.30630 150.58060 365 1 NaN
2 BATHURST -33.40192 149.57459 625 1 NaN
3 BERESFIELD -32.79679 151.66102 14 1 NaN
4 BRADFIELD HIGHWAY -33.84343 151.21142 36 1 NaN
CO.24h.ppm SO2.24h.pphm NO.24h.pphm NO2.24h.pphm OZONE.24h.pphm.1
0 NaN NaN NaN NaN NaN
1 NaN 0.1 1.2 1.0 1.3
2 NaN NaN NaN NaN NaN
3 NaN 0.3 0.9 1.0 1.0
4 0.2 0.2 5.3 2.7 0.5
# Set parameters
target_var = 'NO2.24h.pphm'
target_name = "Nitrous Oxide"
date_low = 1
site = "LIVERPOOL"
cov_func_name = "Squared exponential"
# Create Temporal Gaussian process object
tgpt2 = TGP(df, target_var, target_name, site, date_low, cov_func_name)
# Fit to data
tgpt2.fit()
# Plot predictions
tgpt2.plot_preds()
GP Optimising Hyper-parameters Initial hyperparameter values: [0.949 0.01 0.316] Optimisation results: [0.657 0.861 0.837] Log marginal likelihood [-453.64005731] 100.0% of data within 95% confidence interval
# Load temporal hyperparameter samples from HMC
thphmcdf = pd.read_csv('..\MCMC\Liverpool_temporal_hyperparameters.csv', header = None).T
thphmcdf.columns = ["sigma_f", "l", "sigma_n"]
print(thphmcdf.shape)
print(thphmcdf.head())
(1000, 3)
sigma_f l sigma_n
0 0.630357 0.716565 0.629957
1 0.835544 0.688794 0.731959
2 0.780955 0.682712 0.649670
3 0.778444 0.635120 0.757471
4 0.518603 0.456853 0.721014
# Create Temporal Gaussian process object
tgphmc = TGP(df, target_var, target_name, site, date_low, cov_func_name)
# Load temporal hyperparameter samples from HMC
tgphmc.load_hp_samps(thphmcdf)
# Set Gaussian process to use posterior means of hyperparameters
tgphmc.set_post_mean()
tgphmc.plot_preds()
Posterior means of hyperparameters: sigma_f 0.858700 l 0.884653 sigma_n 0.704946 dtype: float64 100.0% of data within 95% confidence interval
# Set Gaussian process to use maximum a posteriori hyperparameter values
tgphmc.set_max_post()
[0.81227503 0.43141649 0.70402034] Maximum a posteriori estimates of hyperparameters: ['sigma_f' 'l' 'sigma_n'] [0.81227503 0.43141649 0.70402034]
# Plot predictions from MAP hyperparameter estimates
tgphmc.plot_preds()
100.0% of data within 95% confidence interval
tgphmc.plot_post()
Sampling posterior predictive distribution sample 1 sample 101 sample 201 sample 301 sample 401 99.0% of data within 95% credible interval Sampling posterior predictive distribution sample 1 sample 101 sample 201 sample 301 sample 401
# Load temporal hyperparameter samples from VI
thpvidf = pd.read_csv('..\VI\VI_1D.csv', header = 0)
thpvidf.columns = ["l", "sigma_f", "sigma_n"]
thpvidf = thpvidf[["sigma_f", "l", "sigma_n"]]
print(thpvidf.shape)
print(thpvidf.head())
(1000, 3)
sigma_f l sigma_n
0 1.026216 1.066208 0.966962
1 0.872762 1.049733 1.111166
2 0.895601 0.898507 0.896256
3 0.942368 0.923808 1.014475
4 0.958656 1.038754 0.916041
# Create Temporal Gaussian process object
tgpvi = TGP(df, target_var, target_name, site, date_low, cov_func_name)
# Load temporal hyperparameter samples from HMC
tgpvi.load_hp_samps(thpvidf)
# Set Gaussian process to use posterior means of hyperparameters
tgpvi.set_post_mean()
tgpvi.plot_preds()
Posterior means of hyperparameters: sigma_f 1.002107 l 1.004513 sigma_n 0.993568 dtype: float64 100.0% of data within 95% confidence interval
# Set Gaussian process to use maximum a posteriori hyperparameter values
tgpvi.set_max_post()
[0.98021631 0.97702725 0.97536428] Maximum a posteriori estimates of hyperparameters: ['sigma_f' 'l' 'sigma_n'] [0.98021631 0.97702725 0.97536428]
# Plot predictions from MAP hyperparameter estimates
tgpvi.plot_preds()
100.0% of data within 95% confidence interval
tgpvi.plot_post()
Sampling posterior predictive distribution sample 1 sample 101 sample 201 sample 301 sample 401 100.0% of data within 95% credible interval Sampling posterior predictive distribution sample 1 sample 101 sample 201 sample 301 sample 401